Credential Scan on Azure Blob Storage.ipynb (589 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "source": [ "# Credential Scan on Azure Blob Storage\n", "\n", "__Notebook Version:__ 1.1<br>\n", "__Python Version:__ Python 3.10 - SDK v2<br>\n", "__Required Packages:__ No<br>\n", "__Platforms Supported:__ Azure Machine Learning Notebooks\n", " \n", "__Data Source Required:__ No \n", " \n", "### Description\n", "This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Blob Storage using Azure SDK for Python.<br>\n", "*** No need to download and install any other Python modules. ***<br>\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\n", "\n", "## Table of Contents\n", "1. Warm-up\n", "2. Authentication to Azure Storage\n", "3. Scan Azure Blob for Leaking Credentials" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# If you need to know what Python modules are available, you may run this:\n", "# help(\"modules\")" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1617837106035 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } }, "tags": [ "parameters" ] } }, { "cell_type": "code", "source": [ "# Load Python libraries that will be used in this notebook\n", "from azure.mgmt.storage import StorageManagementClient\n", "from azure.identity import DefaultAzureCredential\n", "from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__\n", "from azure.mgmt.resource import ResourceManagementClient\n", "from azure.identity import AzureCliCredential\n", "\n", "import time\n", "import json\n", "import os\n", "import csv\n", "import ipywidgets\n", "from IPython.display import display, HTML, Markdown\n", "import re" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1717536760759 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Functions will be used in this notebook\n", "def read_config_values(file_path):\n", " \"This loads pre-generated parameters for Microsoft Sentinel Workspace\"\n", " with open(file_path) as json_file:\n", " if json_file:\n", " json_config = json.load(json_file)\n", " return (json_config[\"tenant_id\"],\n", " json_config[\"subscription_id\"],\n", " json_config[\"resource_group\"],\n", " json_config[\"workspace_id\"],\n", " json_config[\"workspace_name\"],\n", " json_config[\"user_alias\"],\n", " json_config[\"user_object_id\"])\n", " return None\n", "\n", "def has_valid_token():\n", " \"Check to see if there is a valid AAD token\"\n", " try:\n", " error = \"Please run 'az login'\"\n", " expired = \"AADSTS70043: The refresh token has expired or is invalid\"\n", " failed = \"failed\"\n", " validator = !az account get-access-token\n", " \n", " if any(expired in item for item in validator.get_list()):\n", " return '**The refresh token has expired. <br> Please continue your login process. Then: <br> 1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking \"Compute\" on left menu, then select the instance, clicking \"Restart\"; <br> 2. Otherwise, you may just restart the kernel from top menu. <br> Finally, close and re-load the notebook, then re-run cells one by one from the top.**'\n", " elif any(error in item for item in validator.get_list()) or any(failed in item for item in validator.get_list()):\n", " return \"Please run 'az login' to setup account\"\n", " else:\n", " return None\n", " except:\n", " return \"Please login\"\n", "\n", "def get_file_content(blob):\n", " \"Decoding file content\"\n", " try:\n", " content = blob.content_as_text(max_concurrency=1, encoding='UTF-8')\n", " except UnicodeDecodeError:\n", " content = blob.content_as_text(max_concurrency=1, encoding='UTF-16')\n", " return content\n", "\n", "def get_regex_list():\n", " \"This function return RegEx list for credscan\"\n", " regex_list = [\n", " \"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\\\- ]?(key|secret)[^,a-z]|\\\\.azuredatabricks\\\\.net).{0,10}(dapi)?[a-z0-9/+]{22}\",\n", " \"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\\\.[a-z0-9/+]{40}[^a-z0-9/+])\",\n", " \"(?-i:)\\\\WAIza(?i)[a-z0-9_\\\\\\\\\\\\-]{35}\\\\W\",\n", " \"(?i)(\\\\Wsig\\\\W|Secret(Value)?|IssuerSecret|(\\\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\\\.azure\\\\-devices\\\\.net|\\\\.(core|servicebus|redis\\\\.cache|accesscontrol|mediaservices)\\\\.(windows\\\\.net|chinacloudapi\\\\.cn|cloudapi\\\\.de|usgovcloudapi\\\\.net)|New\\\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)\",\n", " \"(?i)visualstudio\\\\.com.{1,100}\\\\W(?-i:)[a-z2-7]{52}\\\\W\",\n", " \"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d\",\n", " \"(?i)(x-functions-key|ApiKey|Code=|\\\\.azurewebsites\\\\.net/api/).{0,100}[a-z0-9/\\\\+]{54}={2}\",\n", " \"(?i)code=[a-z0-9%]{54,74}(%3d){2}\",\n", " \"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\\\+]{60}\\\\W\",\n", " \"(?i)[^a-z0-9/\\\\+][a-z0-9/\\\\+]{86}==\",\n", " \"(?-i:)\\\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\\\-{5}\",\n", " \"(?i)(app(lication)?|client)[_\\\\- ]?(key(url)?|secret)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\-]\",\n", " \"(?i)refresh[_\\\\-]?token([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2})(\\\"data:text/plain,.+\\\"|[a-z0-9/+=_.-]{20,200})\",\n", " \"(?i)AccessToken(Secret)?([\\\\s\\\"':=|>\\\\]]{3,15}|[\\\"'=:\\\\(]{2}|[\\\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}\",\n", " \"(?i)[a-z0-9]{3,5}://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " \"(?i)snmp(\\\\-server)?\\\\.exe.{0,100}(priv|community)\",\n", " \"(?i)(ConvertTo\\\\-?SecureString\\\\s*((\\\\(|\\\\Wstring)\\\\s*)?['\\\"]+)\",\n", " \"(?i)(Consumer|api)[_\\\\- ]?(Secret|Key)([\\\\s=:>]{1,10}|[\\\\s\\\"':=|>,\\\\]]{3,15}|[\\\"'=:\\\\(]{2})[^\\\\s]{5,}\",\n", " \"(?i)authorization[,\\\\[:= \\\"']+([dbaohmnsv])\",\n", " \"(?i)-u\\\\s+.{2,100}-p\\\\s+[^\\\\-/]\",\n", " \"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\\\s\\\"'/][^:\\\\s\\\"'/\\\\$]+[^:\\\\s\\\"'/\\\\$%]:([^%\\\\s\\\"'/][^@\\\\s\\\"'/]{0,100}[^%\\\\s\\\"'/])@[\\\\$a-z0-9:\\\\.\\\\-_%\\\\?=/]+\",\n", " \"(?i)(\\\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\\\W[a-z0-9/\\\\+]{40}\",\n", " \"(?-i:)(eyJ0eXAiOiJKV1Qi|eyJhbGci)\",\n", " \"(?i)@(\\\\.(on)?)?microsoft\\\\.com[ -~\\\\s]{1,100}?(\\\\w?pass\\\\w?)\",\n", " \"(?i)net(\\\\.exe)?.{1,5}(user\\\\s+|share\\\\s+/user:|user-?secrets? set)\\\\s+[a-z0-9]\",\n", " \"(?i)xox[pbar]\\\\-[a-z0-9]\",\n", " \"(?i)[\\\":\\\\s=]((x?corp|extranet(test)?|ntdev)(\\\\.microsoft\\\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\\\W.{0,100}(password|\\\\Wpwd|\\\\Wpass|\\\\Wpw\\\\W|userpass)\",\n", " \"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\\\s]{0,100}?@([a-z0-9.]+\\\\.(on)?)?microsoft\\\\.com['\\\"]?\",\n", " \"(?i)(\\\\.database\\\\.azure\\\\.com|\\\\.database(\\\\.secure)?\\\\.windows\\\\.net|\\\\.cloudapp\\\\.net|\\\\.database\\\\.usgovcloudapi\\\\.net|\\\\.database\\\\.chinacloudapi\\\\.cn|\\\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\\\Wpwd\\\\W)\",\n", " \"(?i)(secret(.?key)?|password)[\\\"']?\\\\s*[:=]\\\\s*[\\\"'][^\\\\s]+?[\\\"']\",\n", " \"(?i)[^a-z\\\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\\\s?account)\\\\s*[^\\\\w\\\\s,]([ -~\\\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\\\s\\\\$]|\\\\s)\\\\s*(DB_PASS|(sql|service)?password|pwd)\",\n", " \"(?i)(password|secret(key)?)[ \\\\t]*[=:]+[ \\\\t]*([^:\\\\s\\\"';,<]{2,200})\",\n", " ]\n", "\n", " return regex_list\n", "\n", "def set_continuation_flag(flag):\n", " if flag == False:\n", " print(\"continuation flag is false.\")\n", " return flag\n", "\n", "def convert_result_to_string(result_row):\n", " if (type(result_row)) == str:\n", " return result_row\n", " elif (type(result_row)) == tuple:\n", " return ','.join([m for m in result_row if len(m) > 0])\n", "\n", "def export_csv(file_name, data_list):\n", " with open(file_name, 'w') as f:\n", " w = csv.writer(f, delimiter = ',')\n", " w.writerows([x.split(',') for x in data_list])" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1717536825921 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Calling the above function to populate Microsoft Sentinel workspace parameters\n", "# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\n", "tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1717536861362 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 2. Authentication to Azure Storage" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\n", "# You may add [--tenant $tenant_id] to the command\n", "if has_valid_token() != None:\n", " message = '**The refresh token has expired. <br> Please continue your login process. Then: <br> 1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking \"Compute\" on left menu, then select the instance, clicking \"Restart\"; <br> 2. Otherwise, you may just restart the kernel from top menu. <br> Finally, close and re-load the notebook, then re-run cells one by one from the top.**'\n", " display(Markdown(message))\n", " !echo -e '\\e[42m'\n", " !az login --tenant $tenant_id --use-device-code\n", "\n", "# Initializing Azure Storage and Azure Resource Python clients\n", "storage_client = StorageManagementClient(AzureCliCredential(), subscription_id = subscription_id)\n", "resource_client = ResourceManagementClient(AzureCliCredential(), subscription_id = subscription_id)\n", "\n", "# Set continuation_flag\n", "if resource_client == None:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(True)\n", " print('Successfully signed in.')" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1717536881455 } } }, { "cell_type": "code", "source": [ "# Select Azure Resource Group\n", "if continuation_flag:\n", " group_list = resource_client.resource_groups.list()\n", " group_dropdown = ipywidgets.Dropdown(options=sorted([g.name for g in group_list]), description='Groups:')\n", " display(group_dropdown)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1717536892127 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select Azure Storage Account\n", "if continuation_flag and group_dropdown.value != None:\n", " resource_list = resource_client.resources.list_by_resource_group(\n", " group_dropdown.value,\n", " filter=\"resourceType eq 'Microsoft.Storage/storageAccounts'\",\n", " )\n", " storage_account_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in resource_list]), description='Accounts:')\n", " display(storage_account_dropdown)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1717536908377 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 3. Scan Azure Blob for Leaking Credentials" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select a blob container for a specified Azure Storage account\n", "if continuation_flag and storage_account_dropdown.value != None:\n", " storage_keys = storage_client.storage_accounts.list_keys(group_dropdown.value,storage_account_dropdown.value)\n", " if storage_keys != None:\n", " storage_key = {v.key_name: v.value for v in storage_keys.keys}['key1']\n", "\n", " blob_service_client = BlobServiceClient(\n", " account_url=\"https://{0}.blob.core.windows.net\".format(storage_account_dropdown.value),\n", " credential=storage_key\n", " )\n", " if blob_service_client != None:\n", " container_list = blob_service_client.list_containers()\n", " container_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in container_list]), description='Containers:')\n", " display(container_dropdown)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1717536919230 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Select a blob from a specified blob container\n", "if continuation_flag and container_dropdown.value != None:\n", " container_client = blob_service_client.get_container_client(container_dropdown.value)\n", " if container_client != None:\n", " blob_list = container_client.list_blobs()\n", " blob_dropdown = ipywidgets.Dropdown(options=sorted([r.name for r in blob_list]), description='Blobs:')\n", " display(blob_dropdown)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685048298982 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Get blob content\n", "if continuation_flag and blob_dropdown.value != None:\n", " selected_blob = container_client.download_blob(blob_dropdown.value)\n", " if selected_blob != None:\n", " content = get_file_content(selected_blob)\n", " else:\n", " continuation_flag = set_continuation_flag(False)\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685048303926 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Run Regex strings on the file content\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "result_list = []\n", "csv_string = \"\"\n", "if continuation_flag and content != None:\n", " has_leaking = False\n", " regex_list = get_regex_list()\n", " for regex in regex_list:\n", " re.compile(regex)\n", " results = re.findall(regex, content)\n", " if results:\n", " print(\"================================================\")\n", " print(\"MATCHED REGEX:\\n\" + regex)\n", " print(\"------------------------------------------------\")\n", " print(\"FILE: \" + blob_dropdown.value + \"\\n\")\n", " #print(content)\n", " print(\"---------------MATCHED CONTENT -----------------\")\n", " for result in results:\n", " print(str(result))\n", " csv_string = convert_result_to_string(result)\n", " result_list.append(csv_string) \n", " print(\"================================================\")\n", " has_leaking = True \n", " \n", " if has_leaking == False:\n", " print('No leaking data found')\n", "\n", "else:\n", " continuation_flag = set_continuation_flag(False)" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685048306226 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Save results to a csv file in the current file system\n", "if continuation_flag and len(result_list) > 0:\n", " export_csv(\"credscan_blob.csv\", result_list)\n", "else:\n", " print(\"No data\")" ], "outputs": [], "execution_count": null, "metadata": { "collapsed": true, "gather": { "logged": 1685048314084 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } } } ], "metadata": { "celltoolbar": "Tags", "kernel_info": { "name": "python310-sdkv2" }, "kernelspec": { "name": "python310-sdkv2", "language": "python", "display_name": "Python 3.10 - SDK v2" }, "language_info": { "name": "python", "version": "3.10.11", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "microsoft": { "host": { "AzureML": { "notebookHasBeenCompleted": true } }, "ms_spell_check": { "ms_spell_check_language": "en" } }, "nteract": { "version": "nteract-front-end@1.0.0" } }, "nbformat": 4, "nbformat_minor": 0 }